* ------------------------------------------------------ *
* Create permutation test within-income, across Stimulus *
* ------------------------------------------------------ *

/* 
Generate diff-in-diff estimates of spending for every day, excluding holidays and actual analysis window, as a placebo test of the stimulus impact.
Do this for Stim 1 and then Stim 2 Q1 spending, and then randomly draw two (over all possible draws) and take difference between those estimates. Repeated for Q4 via loop.
Likewise for Stim 1 vs. Stim 3, and Stim 2. vs. Stim 3.
Graph Histogram of these placebo estimates compared to actual estimate found in stimulus analysis for Q1 and Q4.
*/

*---------------------
* Set globals 
*---------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Policy"

* Create required subfolders
cap mkdir "${root}/results/`category'"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

********************************************************************************
**# 1. Setup 
********************************************************************************

* Set seed 
set seed 1280  

* Create list of days in 2020-2021 
clear all 
set obs 731                               // 2020 is a leap year, so 366 + 365 days   
gen date = mdy(12, 31, 2019) + _n 
format date %td 
sum date 
assert r(min) == mdy(1, 1, 2020)
assert r(max) == mdy(12, 31, 2021)

* Exclude days that are in our actual analysis windows (including christmas)
drop if inrange(date, mdy(3, 21, 2020), mdy(5, 9, 2020)) 
drop if inrange(date, mdy(12, 4, 2020), mdy(1, 19, 2021)) 
drop if inrange(date, mdy(2, 20, 2021), mdy(4, 10, 2021))

* drop holiday windows 
* drop valentines day
drop if inrange(date, mdy(2, 12, 2021), mdy(2, 19, 2021))
* drop around Labor Day 
drop if inrange(date, mdy(8, 31, 2020), mdy(9, 7, 2020))
* drop around Halloween
drop if inrange(date, mdy(10, 31, 2020), mdy(11, 2, 2020))
* drop around Thanksgiving
drop if inrange(date, mdy(11, 24, 2020), mdy(12, 3, 2020))
* drop around Memorial Day
drop if inrange(date, mdy(5, 25, 2020), mdy(5, 31, 2020))
* drop around July 4
drop if inrange(date, mdy(7, 1, 2020), mdy(7, 6, 2020))

* After dropping all of the above, in the post-Covid period (i.e. >= March 2020), we essentially only have the following months for possible analysis windows
keep if (year(date) == 2020 & inrange(month(date), 8, 11)) | (year(date) == 2021 & inrange(month(date), 2, 3))

* Store list of dates 
glevelsof date, local(stim_dates)

*------------------------------------------------------------------------------*
* Prepare main dataset for permutation analysis 

project, uses("${root}/data/derived/Policy/stimulus_did_dataset.dta")
use "${root}/data/derived/Policy/stimulus_did_dataset.dta", clear

* Keep only the variables we need 
keep round income_q date dow_resid_2019 spend_pp treat 
gisid round income_q date

* Keep only the income quartiles we are interested in 
keep if inlist(income_q, 1, 4)

* Exclude days that are in our actual analysis windows (including christmas)
drop if inrange(date, mdy(3, 21, 2020), mdy(5, 9, 2020)) | inrange(date, mdy(3, 21, 2019), mdy(5, 9, 2019))
drop if inrange(date, mdy(12, 4, 2020), mdy(1, 19, 2021)) | inrange(date, mdy(12, 4, 2019), mdy(1, 19, 2020)) 
drop if inrange(date, mdy(2, 20, 2021), mdy(4, 10, 2021)) | inrange(date, mdy(2, 20, 2020), mdy(4, 10, 2020))

* drop holidays (thanksgiving, July 4, Valentines/presidents day)
* drop valentines/president's day
drop if inrange(date, mdy(2, 12, 2021), mdy(2, 19, 2021)) | inrange(date, mdy(2, 12, 2020), mdy(2, 19, 2020)) | inrange(date, mdy(2, 12, 2019), mdy(2, 19, 2019))
* drop around Thanksgiving
drop if inrange(date, mdy(11, 24, 2020), mdy(12, 5, 2020)) | inrange(date, mdy(11, 24, 2019), mdy(12, 5, 2019))
* drop around Labor Day
drop if inrange(date, mdy(8, 31, 2020), mdy(9, 7, 2020)) | inrange(date, mdy(8, 31, 2019), mdy(9, 7, 2019))
* drop around Halloween
drop if inrange(date, mdy(10, 31, 2020), mdy(11, 2, 2020)) | inrange(date, mdy(10, 31, 2019), mdy(11, 2, 2019))
* drop around Memorial Day
drop if inrange(date, mdy(5, 25, 2020), mdy(5, 31, 2020)) | inrange(date, mdy(5, 25, 2019), mdy(5, 31, 2019))
* drop around July 4
drop if inrange(date, mdy(7, 1, 2020), mdy(7, 6, 2020)) | inrange(date, mdy(7, 1, 2019), mdy(7, 6, 2019))

tempfile permutation_data 
save `permutation_data'

*------------------------------------------------------------------------------*
* Empty dataset to store regression estimates 
clear 

gen round = ""
gen income_q = .
gen specification = ""
gen stim_date = .
gen combined_dollar_rescaled = .
gen pre_period = .
gen post_period = .

tempfile estimates 
save `estimates', replace 


********************************************************************************
**# 2. Permutation analysis: estimates
********************************************************************************
local ticker = 0

foreach stim_date in `stim_dates' {
	
	*======================================================================*
	* Prepare data 
	*======================================================================		
	use `permutation_data', clear

	* Create stimulus dates in treatment and control years 
	gen stim_date = `stim_date'
	gen stim_date_control = mdy(month(stim_date), day(stim_date), year(stim_date) - 2) if stim_date >= mdy(2, 1, 2021)
	replace stim_date_control = mdy(month(stim_date), day(stim_date), year(stim_date) - 1) if missing(stim_date_control)

	* Keep only dates in the analysis window 
	gen analysis_window = inrange(date, stim_date - 25, stim_date + 24) | inrange(date, stim_date_control - 25, stim_date_control + 24) if inlist(round, "april", "march") 
	replace analysis_window = inrange(date, stim_date - 25, stim_date + 15) | inrange(date, stim_date_control - 25, stim_date_control + 15) if round == "january"
	keep if analysis_window == 1 
		
	* Create post indicator (post-stimulus or post-stimulus control, within the respective years)
	gen post = inrange(date, stim_date, stim_date + 24) | inrange(date, stim_date_control, stim_date_control + 24) if inlist(round, "april", "march") 
	replace post = inrange(date, stim_date, stim_date + 15) | inrange(date, stim_date_control, stim_date_control + 15) if round == "january" 
		
	* Create indicators for being within the first five days of the post-period / after the first five days of the post-period 
	gen first_five = (inrange(date, stim_date, stim_date + 4) | inrange(date, stim_date_control, stim_date_control + 4))
	gen after_first_five = (first_five == 0 & post == 1)
	assert first_five + after_first_five == post  
	
	* Normed spending adjusted for pre-trend 
	gen dow_resid_2019_notrend = .
	
	foreach round in april january march {
		
		* Drop placebo dates with fewer than 16 days in the post-period 
		gunique date if post == 1 & treat == 1 & round == "`round'"
		local post_period_days = r(unique)
		if `post_period_days' < 16 {
			continue
		}
		
		* Unique pre-period days 
		gunique date if post == 0 & treat == 1 & round == "`round'"
		local pre_period_days = r(unique)
		
		* Adjust for pre-trend if there are more than 20 days in the pre-period 
		if `pre_period_days' > 20 {
			foreach income_q in 1 4 {
				forval treat = 0/1 {
					reg dow_resid_2019 date if post == 0 & income_q == `income_q' & treat == `treat' & round == "`round'"
			
					predict detrended, resid 
					replace dow_resid_2019_notrend = detrended if income_q == `income_q' & treat == `treat' & round == "`round'"
					drop detrended
				}
			}
		}
		else replace dow_resid_2019_notrend = dow_resid_2019 if round == "`round'"
		
		* Regression estimate 
		foreach income_q in 1 4 {
			foreach depvar in dow_resid_2019_notrend dow_resid_2019 {
				
				if "`depvar'" == "dow_resid_2019_notrend" local spec = "detrended"
				else if "`depvar'" == "dow_resid_2019" local spec = "original"
		
				* Base spending levels per stimulus recipient in Jan 2019
				sum spend_pp if income_q == `income_q' & round == "`round'"
				local spend_pp = r(mean)
		
				* Diff-in-Diff
				reg `depvar' i.treat##i.first_five i.treat##i.after_first_five if income_q == `income_q' & round == "`round'", r
				lincom (1.treat#1.first_five * 5 + 1.treat#1.after_first_five * 26) * `spend_pp'
				local combined_dollar_unscaled = r(estimate)
 		
				* Record regression estimates 
				preserve 
			
				use `estimates', clear 
			
				* Advance ticker by 1 if we did not drop the placebo date 
				local ticker = `ticker' + 1
				set obs `ticker'
			
				* Save other information 
				replace round = "`round'" in `ticker'
				replace income_q = `income_q' in `ticker'
				replace specification = "`spec'" in `ticker'
				replace stim_date = `stim_date' in `ticker'
				replace pre_period = `pre_period_days' in `ticker'
				replace post_period = `post_period_days' in `ticker'
			
				* Dollar effect (31 days after stimulus, cumulative)			
				if "`round'" == "april" replace combined_dollar_rescaled = `combined_dollar_unscaled' * 1200 / 1200 in `ticker'
				if "`round'" == "january" replace combined_dollar_rescaled = `combined_dollar_unscaled' * 1200 / 600 in `ticker'
				if "`round'" == "march" replace combined_dollar_rescaled = `combined_dollar_unscaled' * 1200 / 1400 in `ticker'
			
				save `estimates', replace
				restore
			}
		}
	}		
}

use `estimates', clear 
format %td stim_date
save "${root}/data/derived/Policy/stimulus_permutation_dataset.dta", replace 
project, creates("${root}/data/derived/Policy/stimulus_permutation_dataset.dta")
		 
********************************************************************************
**# 3. Permutation analysis: differences
********************************************************************************

project, uses("${root}/data/derived/Policy/stimulus_permutation_dataset.dta")
use "${root}/data/derived/Policy/stimulus_permutation_dataset.dta", clear

* We should have fewer than 100 unique dates for each stimulus round 
foreach round in april january march {
	gunique stim_date if round == "`round'"
	local unique_`round' = r(unique)
	assert `unique_`round'' <= 100
}

* Save selected dates 
save "${root}/data/derived/Policy/stimulus_permutation_selected.dta", replace 
project, creates("${root}/data/derived/Policy/stimulus_permutation_selected.dta")

*------------------------------------------------------------------------------*
* Create empty dataset to store differences 

clear 
gen income_q = .
gen specification = ""
gen comparison = ""
gen diff = .

tempfile differences 
save `differences', replace

local ticker = 0

*==============================================================================*
* Stim 1 vs. Stim 2 
*==============================================================================*

project, uses("${root}/data/derived/Policy/stimulus_permutation_selected.dta")

* With fewer than 10,000 unique date pairs, we don't have to do any random sampling - just calculate all possible differences 
foreach income_q in 1 4 {
	foreach spec in "detrended" "original" {
		use "${root}/data/derived/Policy/stimulus_permutation_selected.dta" if income_q == `income_q' & specification == "`spec'" & inlist(round, "april", "january"), clear
		gisid round stim_date 
		sort round stim_date 
		by round: gen obs = _n 
	
		forval i = 1/`unique_april' {
			forval j = 1/`unique_january' {
			
				* Take difference 
				sum combined_dollar_rescaled if round == "april" & obs == `i'
				local april_estimate = r(mean)
				sum combined_dollar_rescaled if round == "january" & obs == `j'
				local january_estimate = r(mean)
				local stim1v2 = `april_estimate' - `january_estimate'	
			
				* Save difference 
				preserve 
		
				use `differences', clear 
		
				local ticker = `ticker' + 1
				set obs `ticker'
		
				replace income_q = `income_q' in `ticker'
				replace specification = "`spec'" in `ticker'
				replace comparison = "Stim 1 v 2" in `ticker'
				replace diff = `stim1v2' in `ticker'
		
				save `differences', replace 
				restore 
			}
		}
	} 
}

*==============================================================================*
* Stim 1 vs. Stim 3 
*==============================================================================*

project, uses("${root}/data/derived/Policy/stimulus_permutation_selected.dta")

* With fewer than 10,000 unique date pairs, we don't have to do any random sampling - just calculate all possible differences 
foreach income_q in 1 4 {
	foreach spec in "detrended" "original" {
		use "${root}/data/derived/Policy/stimulus_permutation_selected.dta" if income_q == `income_q' & specification == "`spec'" & inlist(round, "april", "march"), clear
		gisid round stim_date 
		sort round stim_date 
		by round: gen obs = _n 
	
		forval i = 1/`unique_april' {
			forval j = 1/`unique_march' {
			
				* Take difference 
				sum combined_dollar_rescaled if round == "april" & obs == `i'
				local april_estimate = r(mean)
				sum combined_dollar_rescaled if round == "march" & obs == `j'
				local march_estimate = r(mean)
				local stim1v3 = `april_estimate' - `march_estimate'	
			
				* Save difference 
				preserve 
		
				use `differences', clear 
		
				local ticker = `ticker' + 1
				set obs `ticker'
		
				replace income_q = `income_q' in `ticker'
				replace specification = "`spec'" in `ticker'
				replace comparison = "Stim 1 v 3" in `ticker'
				replace diff = `stim1v3' in `ticker'
		
				save `differences', replace 
				restore 
			}
		}
	} 
}

*==============================================================================*
* Stim 2 vs. Stim 3 
*==============================================================================*

project, uses("${root}/data/derived/Policy/stimulus_permutation_selected.dta")

* With fewer than 10,000 unique date pairs, we don't have to do any random sampling - just calculate all possible differences 
foreach income_q in 1 4 {
	foreach spec in "detrended" "original" {
		use "${root}/data/derived/Policy/stimulus_permutation_selected.dta" if income_q == `income_q' & specification == "`spec'" & inlist(round, "january", "march"), clear
		gisid round stim_date 
		sort round stim_date 
		by round: gen obs = _n 
	
		forval i = 1/`unique_january' {
			forval j = 1/`unique_march' {
			
				* Take difference 
				sum combined_dollar_rescaled if round == "january" & obs == `i'
				local january_estimate = r(mean)
				sum combined_dollar_rescaled if round == "march" & obs == `j'
				local march_estimate = r(mean)
				local stim2v3 = `january_estimate' - `march_estimate'	
			
				* Save difference 
				preserve 
		
				use `differences', clear 
		
				local ticker = `ticker' + 1
				set obs `ticker'
		
				replace income_q = `income_q' in `ticker'
				replace specification = "`spec'" in `ticker'
				replace comparison = "Stim 2 v 3" in `ticker'
				replace diff = `stim2v3' in `ticker'
		
				save `differences', replace 
				restore 
			}
		}
	} 
}

********************************************************************************
**# 4. Histograms 
********************************************************************************

use `differences', clear 

* Number of observations: no. of comparisons * no. of specifications * no. of income quartiles
assert _N == ((`unique_april' * `unique_january') + (`unique_april' * `unique_march') + (`unique_january' * `unique_march')) * 2 * 2
assert !missing(diff)

* Save differences 
save "${root}/data/derived/Policy/stimulus_permutation_differences.dta", replace 
project, creates("${root}/data/derived/Policy/stimulus_permutation_differences.dta")

* Merge in the actual estimate 
project, uses("${root}/data/derived/stimulus_dollar_estimate_diff.dta")
merge m:1 specification income_q using "${root}/data/derived/stimulus_dollar_estimate_diff.dta", assert(3) nogen 

*---------------------------
* Histogram
*---------------------------

foreach comparison in "Stim 1 v 2" "Stim 1 v 3" "Stim 2 v 3" {
	
	local first_stim = substr("`comparison'", 6, 1)
	local second_stim = substr("`comparison'", 10, 1)
	
	foreach income_q in 1 4 {
		foreach spec in "detrended" "original" {
			
			if "`spec'" == "detrended" local graph_title ""
			else if "`spec'" == "original" local graph_title " original"
	
			preserve
	
			keep if comparison == "`comparison'" & specification == "`spec'" & income_q == `income_q'
			local total = _N
			assert `total' <= 10000
	
			* Get true diff 
			sum stim`first_stim'v`second_stim'
			local true_diff = r(mean)
			local true_diff_round: di %4.1f r(mean)
			
			* right side pvalue 
			count if diff > `true_diff'
			local count = r(N) 
			local frac = `count' / `total' 

			* left side pvalue
			count if diff < `true_diff'
			local count_left = r(N) 
			local frac_left = `count_left' / `total' 

			* two-sided pvalue
			local frac2 = 2 * min(`frac', `frac_left')

			* Round p-values (after and not before adding)
			local frac: di %04.3f `frac'
			local frac_left: di %04.3f `frac_left'
			local frac2 : di %04.3f `frac2'

			sum diff 
			local stim_sd: di %4.1f `r(sd)'

			* find 95% CI 
			sort diff
			sum diff if _n == floor(`total' * 0.025)
			local left_95_CI: di %4.1f r(mean)
			sum diff if _n == ceil(`total' * 0.975)
			local right_95_CI: di %4.1f r(mean)

			if income_q == 1 {
				
				tw ///
				(hist diff, bin(20) frac color(oi1)) ///
				, ///
				xtitle("") ///  
				ytitle("Fraction of Estimates") /// 
				text(0.15 -375 "S.D. = `stim_sd'", size(small) color(black)) ///
				text(0.14 -375 "95% CI = [`left_95_CI', `right_95_CI']", size(small) color(black)) ///
				text(0.13 -375 "Two-sided p-val = `frac2'", size(small) color(black)) ///
				text(0.16 `=`true_diff_round'' "Stim `first_stim' vs. `second_stim' Actual Effect:" "`true_diff_round'", color(black) size(small)) ///
				xline(`true_diff_round', lcolor(gs8) lpattern(dash) noextend) ///
				xtitle("Triple-Diff Estimate for Effect on Consumer Spending per $1200 of Stimulus") ///
				ylab(0 "0" 0.05 "0.05" 0.1 "0.1" 0.15 "0.15", nogrid) ///
				yscale(range(0 0.15)) ///
				xsc(ra(-500 550)) ///
				title(" ", size(huge)) ///
				xlab(-500 "-$500" -250 "-$250" 0 "$0" 250 "$250" 500 "$500") 

				oi_graph_export "${root}/results/Policy/histogram permutation triple-diff Stim`first_stim'v`second_stim' Q1`graph_title'", type(${fig_type}) 
			}
	
			if income_q == 4 {
				
				tw ///
				(hist diff, bin(20) frac color(oi2)) ///
				, ///
				xtitle("") ///  
				ytitle("Fraction of Estimates") /// 
				text(0.15 -750 "S.D. = `stim_sd'", size(small) color(black)) ///
				text(0.14 -750 "95% CI = [`left_95_CI', `right_95_CI']", size(small) color(black)) ///
				text(0.13 -750 "Two-sided p-val = `frac2'", size(small) color(black)) ///
				text(0.16 `=`true_diff_round'' "Stim `first_stim' vs. `second_stim' Actual Effect:" "`true_diff_round'", color(black) size(small)) ///
				xline(`true_diff_round', lcolor(gs8) lpattern(dash) noextend) ///
				xtitle("Triple-Diff Estimate for Effect on Consumer Spending per $1200 of Stimulus") ///
				ylab(0 "0" 0.05 "0.05" 0.1 "0.1" 0.15 "0.15", nogrid) ///
				yscale(range(0 0.15)) ///
				xsc(ra(-1000 1050)) ///
				title(" ", size(huge)) ///
				xlab(-1000 "-$1000" -500 "-$500" 0 "$0" 500 "$500" 1000 "$1000") 
		
				oi_graph_export "${root}/results/Policy/histogram permutation triple-diff Stim`first_stim'v`second_stim' Q4`graph_title'", type(${fig_type}) 
		}
		
		*-------------------------------------------------------------------------------
		* Export output numbers to csv file - only for detrended values 
		
		if "`spec'" == "detrended" {
			cap erase "${root}/results/paper numbers/`category'/Triple-diff Stim `first_stim' vs Stim `second_stim' permutation for Q`income_q'.yaml"
		
			yamlout using "${root}/results/paper numbers/`category'/Triple-diff Stim `first_stim' vs Stim `second_stim' permutation for Q`income_q'.yaml", ///
				key("stim`first_stim'v`second_stim'_q`income_q'") ///
				comment("Stim `first_stim' vs `second_stim'") ///
				value(`true_diff_round') fmt(%9.2f)
			yamlout using "${root}/results/paper numbers/`category'/Triple-diff Stim `first_stim' vs Stim `second_stim' permutation for Q`income_q'.yaml", ///
				key("stim`first_stim'v`second_stim'_sd_q`income_q'") ///
				comment("SD") ///
				value(`stim_sd') fmt(%9.2f)
			yamlout using "${root}/results/paper numbers/`category'/Triple-diff Stim `first_stim' vs Stim `second_stim' permutation for Q`income_q'.yaml", ///
				key("stim`first_stim'v`second_stim'_rpval_q`income_q'") ///
				comment("Right-sided p-val") ///
				value(`frac') fmt(%9.3f)
			yamlout using "${root}/results/paper numbers/`category'/Triple-diff Stim `first_stim' vs Stim `second_stim' permutation for Q`income_q'.yaml", ///
				key("stim`first_stim'v`second_stim'_ci_q`income_q'") ///
				comment("CI") ///
				value("[`left_95_CI', `right_95_CI']") 
			yamlout using "${root}/results/paper numbers/`category'/Triple-diff Stim `first_stim' vs Stim `second_stim' permutation for Q`income_q'.yaml", ///
				key("stim`first_stim'v`second_stim'_pval_q`income_q'") ///
				comment("Two-sided p-val") ///
				value(`frac2') fmt(%9.3f)
				
			project, creates("${root}/results/paper numbers/`category'/Triple-diff Stim `first_stim' vs Stim `second_stim' permutation for Q`income_q'.yaml")
		}

		restore
		
		* Scalars for bar chart - two-sided p-val
		local pval_stim`first_stim'v`second_stim'_q`income_q'_`spec' = `frac2'
		}
	}
} 

*-------------------------------------------------------------------------------
* Save p-values for bar chart
clear
set obs 4

gen income_q = .
gen specification = ""
gen pval_stim1v2 = .
gen pval_stim1v3 = .
gen pval_stim2v3 = .

replace income_q = 1 in 1 
replace specification = "detrended" in 1
replace pval_stim1v2 = `pval_stim1v2_q1_detrended' in 1
replace pval_stim1v3 = `pval_stim1v3_q1_detrended' in 1
replace pval_stim2v3 = `pval_stim2v3_q1_detrended' in 1 

replace income_q = 4 in 2 
replace specification = "detrended" in 2
replace pval_stim1v2 = `pval_stim1v2_q4_detrended' in 2
replace pval_stim1v3 = `pval_stim1v3_q4_detrended' in 2
replace pval_stim2v3 = `pval_stim2v3_q4_detrended' in 2

replace income_q = 1 in 3 
replace specification = "original" in 3
replace pval_stim1v2 = `pval_stim1v2_q1_original' in 3
replace pval_stim1v3 = `pval_stim1v3_q1_original' in 3
replace pval_stim2v3 = `pval_stim2v3_q1_original' in 3 

replace income_q = 4 in 4 
replace specification = "original" in 4
replace pval_stim1v2 = `pval_stim1v2_q4_original' in 4
replace pval_stim1v3 = `pval_stim1v3_q4_original' in 4
replace pval_stim2v3 = `pval_stim2v3_q4_original' in 4


* Export
export excel "${root}/results/stim_pvals.xlsx", sheet(stim_pvals, replace) firstrow(variables)
project, creates("${root}/results/stim_pvals.xlsx")

